#107.1
crianca<-c(39,30,32,34,35,36,36,30)
adulto<-c(71,63,63,67,68,68,70,64)
alturas<-data.frame(crianca,adulto)
alturas.lm<-lm(adulto~crianca,data=alturas)
anova(alturas.lm)
alturas.conf<-confint(alturas.lm)
#discussao em sala
plot(alturas)
abline(alturas.lm, col="blue")
adulto.esp <- c(60,65,70,72,62,80,59,64)
crianca.esp <- adulto.esp/2
alturas.esp <- data.frame(adulto.esp,crianca.esp)
alturas.esp.lm <- lm(adulto.esp~crianca.esp)
coef(alturas.esp.lm)
plot(alturas.esp)
abline(alturas.esp.lm, col="red")

#107.2
library(MASS)
anim.m2 <- lm(log(brain)~log(body),data=Animals,subset=!(log(Animals$body)>8&log(Animals$brain)<6))
anim.m0 <- lm(log(brain)~1, data=Animals,subset=!(log(Animals$body)>8&log(Animals$brain)<6))
anova(anim.m0,anim.m2)
anova(anim.m2)
#1
anova(anim.m2)
#Obtm o mesmo resultado, uma vez que anim.m0  modelo nulo
#2
summary(anim.m0)
mean(log(Animals$brain[!(log(Animals$body)>8&log(Animals$brain)<6)]))
sd(log(Animals$brain[!(log(Animals$body)>8&log(Animals$brain)<6)]))
anim.m0 <- update(anim.m2, .~. -log(body))
#A mdia corresponde  estimativa do intercepto, enquanto o desvio padro, ao erro residual padro.

#107.3
head(iris)
setosa<-iris[iris$Species=="setosa",]
setosa$Species=="setosa"
head(setosa)
lm.iris<-lm(Sepal.Width~Sepal.Length,data=setosa)
lm.iris.coef<-coef(lm.iris)
larg.sep.com.pet<-lm(Sepal.Width~Petal.Length,data=setosa)
comp.sep.com.pet<-lm(Sepal.Length~Petal.Length,data=setosa)
res.lar<-residuals(larg.sep.com.pet)
res.comp<-residuals(comp.sep.com.pet)
lm.iris.nopetal<-lm(res.lar~res.comp)
lm.iris.nopetal.coef<-coef(lm.iris.nopetal)

#107.4
pressure
p<-pressure[,2]
t<-pressure[,1]
#plot(p~t)
#no
reg1<-lm(p~t)
reg2 <- update(reg1, .~. + I(t^2))
reg2.lm <- lm(p~t + I(t^2))
reg3 <- update(reg2,.~.+I(t^3))
reg3.lm <- lm(p~t + I(t^2) + I(t^3))
summary(reg1)
summary(reg2)
summary.reg3<-summary(reg3)
str(summary.reg3)
r2 <- summary.reg3$r.squared

#107.5
#107.5
aves<-read.csv("aves_cerrado.csv",header=TRUE,sep=";")
head(aves)
aves$fisionomia[aves$fisionomia=="ce"] <- "Ce"
aves$urubu[is.na(aves$urubu)] <- 0
aves$carcara[is.na(aves$carcara)] <- 0
aves$seriema[is.na(aves$seriema)] <- 0
CL <- aves[aves$fisionomia=="CL",]
CC <- aves[aves$fisionomia=="CC",]
Ce <- aves[aves$fisionomia=="Ce",]
mod.cl<-lm(seriema~carcara,data=CL)
mod.cc<-lm(seriema~carcara,data=CC)
mod.ce<-lm(seriema~carcara,data=Ce)
coef.cl<-coef(mod.cl)
coef.cc<-coef(mod.cc)
coef.ce<-coef(mod.ce)
s.cl<-summary(mod.cl)
p.cl<-s.cl$coefficients[2,4]
s.cc<-summary(mod.cc)
p.cc<-s.cc$coefficients[2,4]
s.ce<-summary(mod.ce)
p.ce<-s.ce$coefficients[2,4]

#107b
#Galileu
init.h = c(600, 700, 800, 950, 1100, 1300, 1500)
h.d = c(253, 337, 395, 451, 495, 534, 573)
mod1 <- lm(h.d~init.h)
mod2 <- lm(h.d~init.h+I(init.h^2))
mod3 <- lm(h.d~init.h++I(init.h^2)+I(init.h^3))
summary(mod1)
summary(mod2)
summary(mod3)
s.mod1 <- summary(mod1)
r.mod1 <- s.mod1$r.squared
s.mod2 <- summary(mod2)
r.mod2 <- s.mod2$r.squared
s.mod3 <- summary(mod3)
r.mod3 <- s.mod3$r.squared
#r2 do modelo 3 explica a maior parte da variacao (0.9993735)
#agora, comparando modelos
anova(mod1,mod2)
#escolher modelo 2 e descartar modelo 1
anova(mod2,mod3)
#escolher modelo 3 e descartar modelo 2

#Massa de recem nascidos
babies<-read.table("babies.txt",header=TRUE,sep="")
head(babies)
is.na(babies)
#valores ignorados retirados
babies$bwt[babies$bwt==999]<-NA
babies$gestation[babies$gestation==999]<-NA
babies$parity[babies$parity==9]<-NA
babies$age[babies$age==99]<-NA
babies$height[babies$height==99]<-NA
babies$weight[babies$weight==999]<-NA
babies$smoke[babies$smoke==9]<-NA
table(is.na(babies)=="TRUE")
babies<-na.exclude(babies)
#selecao de modelos
mod1<-lm(bwt~gestation,data=babies)
(r2.mod1<-summary(mod1)$r.squared)
mod2<-lm(bwt~parity,data=babies)
(r2.mod2<-summary(mod2)$r.squared)
mod3<-lm(bwt~age,data=babies)
(r2.mod3<-summary(mod3)$r.squared)
mod4<-lm(bwt~height,data=babies)
(r2.mod4<-summary(mod4)$r.squared)
mod5<-lm(bwt~weight,data=babies)
(r2.mod5<-summary(mod5)$r.squared)
mod6<-lm(bwt~smoke,data=babies)
(r2.mod6<-summary(mod6)$r.squared)
mod7<-lm(bwt~gestation+parity+age+height+weight+smoke,data=babies)
(r2.mod7<-summary(mod7)$r.squared)
#comeando do mais complexo e, em ordem, retirando os menores r2
mod8<-lm(bwt~gestation+parity+height+weight+smoke,data=babies)
(r2.mod8<-summary(mod8)$r.squared)
anova(mod7,mod8)#dif nao significativa
mod9<-lm(bwt~gestation+height+weight+smoke,data=babies)
(r2.mod9<-summary(mod9)$r.squared)
anova(mod7,mod9)#h dif significativa, portanto, o modelo 9, sem age e parity, deve ser
#o escolhido
